# -*- coding: utf-8 -*-
# @Date    : 2021/4/23
# @Author  : Maoxian

# 下载《诛仙》小说全章节
import os
import time

import requests
from lxml import etree
from urllib.parse import urljoin


# 获取 小说名称 作者 小说简介 及 所有章节的url
def get_book_info(url):
    r = requests.get(url, headers)
    book_html = etree.HTML(r.content.decode('utf8'))
    chapters = book_html.xpath('//*[@id="list"]/dl/dd')

    return dict(
        name=book_html.xpath('//*[@id="info"]/h1/text()')[0],  # 书名
        author=book_html.xpath('//*[@id="info"]/p[1]/text()')[0].split('：')[-1],  # 作者
        update_time=book_html.xpath('//*[@id="info"]/p[3]/text()')[0].split('：')[-1],  # 最后更新时间
        profile=book_html.xpath('//*[@id="intro"]/p[2]/text()')[0],  # 简介
        chapters=[(chapter.xpath('./a/text()')[0], chapter.xpath('./a/@href')[0]) for chapter in chapters]  # 章节列表
    )


# 获取一个章节的内容
def get_chapter(url):
    r = requests.get(url, headers)
    tree = etree.HTML(r.content.decode('utf8'))
    content = tree.xpath('//*[@id="content"]/text()')
    content = "".join(content)
    content = "".join(content.split())
    content.encode('utf8')
    return content


# 爬取小说的说有章节内容，并存储
def get_book(book):
    # 1. 创建book文件夹
    if not os.path.exists(book['name']):
        os.makedirs(book['name'])

    # 2. 爬取章节目录，并保存
    for title, url in book['chapters']:
        try:
            print(f'正在下载 - {title}: {url}')
            content = get_chapter(urljoin(base_url, url))
            with open(f"{book['name']}/{title}.txt", 'w',  encoding='utf8') as f:
                f.write(content)
            time.sleep(0.5)
        except Exception as e:
            print(f'下载失败 -- {title}: {url}')


if __name__ == '__main__':
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42"
    }

    base_url = "http://www.xbiquge.la"
    book_url = "http://www.xbiquge.la/1/1693/"

    book = get_book_info(book_url)
    get_book(book)
